{
long ret = 0;
block_io_op_t op;
+ struct task_struct *p = current;
if (copy_from_user(&op, u_block_io_op, sizeof(op)))
return -EFAULT;
case BLOCK_IO_OP_SIGNAL:
/* simply indicates there're reqs outstanding => add current to list */
- add_to_blkdev_list_tail(current);
+ add_to_blkdev_list_tail(p);
maybe_trigger_io_schedule();
break;
case BLOCK_IO_OP_ATTACH_VBD:
/* attach a VBD to a given domain; caller must be privileged */
- if(!IS_PRIV(current))
+ if( !IS_PRIV(p) )
return -EPERM;
ret = vbd_attach(&op.u.attach_info);
break;
+ case BLOCK_IO_OP_RESET:
+ /* Avoid a race with the tasklet. */
+ remove_from_blkdev_list(p);
+ if ( p->blk_req_cons != p->blk_resp_prod )
+ {
+ /* Interface isn't quiescent. */
+ ret = -EINVAL;
+ }
+ else
+ {
+ p->blk_req_cons = p->blk_resp_prod = 0;
+ ret = 0;
+ }
+ break;
+
default:
ret = -ENOSYS;
}
#define NETOP_PUSH_BUFFERS 0 /* Notify Xen of new buffers on the rings. */
#define NETOP_FLUSH_BUFFERS 1 /* Flush all pending request buffers. */
-
+#define NETOP_RESET_RINGS 2 /* Reset ring indexes on a quiescent vif. */
typedef struct tx_req_entry_st
{
/* Block I/O trap operations and associated structures.
*/
-#define BLOCK_IO_OP_SIGNAL 0 // let xen know we have work to do
-#define BLOCK_IO_OP_ATTACH_VBD 1 // attach a VBD to a given domain
-
+#define BLOCK_IO_OP_SIGNAL 0 /* let xen know we have work to do */
+#define BLOCK_IO_OP_ATTACH_VBD 1 /* attach a VBD to a given domain */
+#define BLOCK_IO_OP_RESET 2 /* reset ring indexes on quiescent i/f */
typedef struct _extent {
u16 raw_device;
typedef struct _vbd_attach {
int domain;
- u16 mode; // read-only or read-write
- u16 device; // how this domain refers to this VBD
- int nr_extents; // number of extents in the VBD
- extent_t *extents; // pointer to /array/ of extents
+ u16 mode; /* read-only or read-write */
+ u16 device; /* how this domain refers to this VBD */
+ int nr_extents; /* number of extents in the VBD */
+ extent_t *extents; /* pointer to /array/ of extents */
} vbd_attach_t;
unsigned long cmd;
union
{
- long signal_val_unused;
+ /* no entry for BLOCK_IO_OP_SIGNAL */
vbd_attach_t attach_info;
+ /* no entry for BLOCK_IO_OP_RESET */
}
u;
} block_io_op_t;
ret = flush_bufs_for_vif(vif);
break;
+ case NETOP_RESET_RINGS:
+ /* We take the tx_lock to avoid a race with get_tx_bufs. */
+ spin_lock_irq(&vif->tx_lock);
+ if ( (vif->rx_req_cons != vif->rx_resp_prod) ||
+ (vif->tx_req_cons != vif->tx_resp_prod) )
+ {
+ /* The interface isn't quiescent. */
+ ret = -EINVAL;
+ }
+ else
+ {
+ vif->rx_req_cons = vif->rx_resp_prod = 0;
+ vif->tx_req_cons = vif->tx_resp_prod = 0;
+ ret = 0;
+ }
+ spin_unlock_irq(&vif->tx_lock);
+ break;
+
default:
ret = -EINVAL;
break;
#define XLBLK_RESPONSE_IRQ _EVENT_BLKDEV
#define DEBUG_IRQ _EVENT_DEBUG
+#define STATE_ACTIVE 0
+#define STATE_SUSPENDED 1
+#define STATE_CLOSED 2
+static unsigned int state = STATE_SUSPENDED;
+
static blk_ring_t *blk_ring;
static unsigned int resp_cons; /* Response consumer for comms ring. */
static unsigned int req_prod; /* Private request producer. */
static xen_disk_info_t xlblk_disk_info;
static int xlblk_control_msg_pending;
-#define RING_FULL (BLK_RING_INC(req_prod) == resp_cons)
+/* We plug the I/O ring if the driver is suspended or if the ring is full. */
+#define RING_PLUGGED ((BLK_RING_INC(req_prod) == resp_cons) || \
+ (state != STATE_ACTIVE))
/*
* Request queues with outstanding work, but ring is currently full.
if ( nr_sectors >= (1<<9) ) BUG();
if ( (buffer_ma & ((1<<9)-1)) != 0 ) BUG();
+ if ( state == STATE_CLOSED )
+ return 1;
+
switch ( operation )
{
case XEN_BLOCK_VBD_CREATE:
case XEN_BLOCK_PHYSDEV_GRANT:
case XEN_BLOCK_PHYSDEV_PROBE:
case XEN_BLOCK_PROBE:
- if ( RING_FULL ) return 1;
+ if ( RING_PLUGGED ) return 1;
phys_device = (kdev_t) 0;
sector_number = 0;
DISABLE_SCATTERGATHER();
DISABLE_SCATTERGATHER();
return 0;
}
- else if ( RING_FULL )
+ else if ( RING_PLUGGED )
{
return 1;
}
int i;
unsigned long flags;
struct buffer_head *bh, *next_bh;
+
+ if ( state == STATE_CLOSED )
+ return;
spin_lock_irqsave(&io_request_lock, flags);
while ( nr_pending != 0 )
{
do_xlblk_request(pending_queues[--nr_pending]);
- if ( RING_FULL ) break;
+ if ( RING_PLUGGED ) break;
}
}
}
-int __init xlblk_init(void)
+static void reset_xlblk_interface(void)
{
- int error;
+ block_io_op_t op;
xlblk_control_msg_pending = 0;
nr_pending = 0;
- /* This mapping was created early at boot time. */
+ op.cmd = BLOCK_IO_OP_RESET;
+ if ( HYPERVISOR_block_io_op(&op) != 0 )
+ printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n");
+
+ set_fixmap(FIX_BLKRING_BASE, start_info.blk_ring);
blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
-
+
+ wmb();
+ state = STATE_ACTIVE;
+}
+
+
+int __init xlblk_init(void)
+{
+ int error;
+
+ reset_xlblk_interface();
+
error = request_irq(XLBLK_RESPONSE_IRQ, xlblk_response_int,
SA_SAMPLE_RANDOM, "blkdev", NULL);
if ( error )
module_init(xlblk_init);
module_exit(xlblk_cleanup);
#endif
+
+
+void blkdev_suspend(void)
+{
+ state = STATE_SUSPENDED;
+ wmb();
+
+ while ( resp_cons != blk_ring->req_prod )
+ {
+ barrier();
+ current->state = TASK_INTERRUPTIBLE;
+ schedule_timeout(1);
+ }
+
+ wmb();
+ state = STATE_CLOSED;
+ wmb();
+
+ clear_fixmap(FIX_BLKRING_BASE);
+}
+
+
+void blkdev_resume(void)
+{
+ reset_xlblk_interface();
+}
static struct list_head dev_list;
-/*
- * Needed because network_close() is not properly implemented yet. So
- * an open after a close needs to do much less than the initial open.
- */
-static int opened_once_already = 0;
-
struct net_private
{
struct list_head list;
struct net_device_stats stats;
atomic_t tx_entries;
unsigned int rx_resp_cons, tx_resp_cons, tx_full;
+ unsigned int net_ring_fixmap_idx;
net_ring_t *net_ring;
net_idx_t *net_idx;
spinlock_t tx_lock;
unsigned int rx_bufs_to_notify;
+#define STATE_ACTIVE 0
+#define STATE_SUSPENDED 1
+#define STATE_CLOSED 2
+ unsigned int state;
+
/*
* {tx,rx}_skbs store outstanding skbuffs. The first entry in each
* array is an index into a chain of free entries.
static int network_open(struct net_device *dev)
{
struct net_private *np = dev->priv;
- int i, error = 0;
+ int i;
- if ( opened_once_already )
- {
- memset(&np->stats, 0, sizeof(np->stats));
- netif_start_queue(dev);
- return 0;
- }
+ if ( HYPERVISOR_net_io_op(NETOP_RESET_RINGS, np->idx) != 0 )
+ printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n");
+
+ set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx,
+ start_info.net_rings[np->idx]);
+ np->net_ring = (net_ring_t *)fix_to_virt(
+ FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
+ np->net_idx = &HYPERVISOR_shared_info->net_idx[np->idx];
np->rx_bufs_to_notify = 0;
np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
for ( i = 0; i < RX_RING_SIZE; i++ )
np->rx_skbs[i] = (void *)(i+1);
- error = request_irq(NET_IRQ, network_interrupt,
- SA_SAMPLE_RANDOM, "network", dev);
- if ( error )
- {
- printk(KERN_WARNING "%s: Could not allocate network interrupt\n",
- dev->name);
- goto fail;
- }
-
- error = request_irq(_EVENT_DEBUG, dbg_network_int, SA_SHIRQ,
- "debug", dev);
- if ( error )
- {
- printk(KERN_WARNING "%s: Non-fatal error -- no debug interrupt\n",
- dev->name);
- }
+ wmb();
+ np->state = STATE_ACTIVE;
network_alloc_rx_buffers(dev);
- printk("XenoLinux Virtual Network Driver installed as %s\n", dev->name);
-
netif_start_queue(dev);
MOD_INC_USE_COUNT;
- opened_once_already = 1;
-
return 0;
-
- fail:
- kfree(np);
- return error;
}
if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) )
{
np->tx_full = 0;
- netif_wake_queue(dev);
+ if ( np->state == STATE_ACTIVE )
+ netif_wake_queue(dev);
}
}
struct sk_buff *skb;
unsigned int end = RX_RING_ADD(np->rx_resp_cons, RX_MAX_ENTRIES);
- if ( (i = np->net_idx->rx_req_prod) == end )
+ if ( ((i = np->net_idx->rx_req_prod) == end) ||
+ (np->state != STATE_ACTIVE) )
return;
do {
}
-static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs)
+static inline void _network_interrupt(struct net_device *dev)
{
+ struct net_private *np = dev->priv;
unsigned int i;
unsigned long flags;
- struct net_device *dev = (struct net_device *)dev_id;
- struct net_private *np = dev->priv;
struct sk_buff *skb;
rx_resp_entry_t *rx;
+
+ if ( np->state == STATE_CLOSED )
+ return;
spin_lock_irqsave(&np->tx_lock, flags);
network_tx_buf_gc(dev);
}
+static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs)
+{
+ struct list_head *ent;
+ struct net_private *np;
+ list_for_each ( ent, &dev_list )
+ {
+ np = list_entry(ent, struct net_private, list);
+ _network_interrupt(np->dev);
+ }
+}
+
+
int network_close(struct net_device *dev)
{
- netif_stop_queue(dev);
+ struct net_private *np = dev->priv;
+
+ np->state = STATE_SUSPENDED;
+ wmb();
+
+ netif_stop_queue(np->dev);
+
+ HYPERVISOR_net_io_op(NETOP_FLUSH_BUFFERS, np->idx);
+
+ while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) ||
+ (np->tx_resp_cons != np->net_idx->tx_req_prod) )
+ {
+ barrier();
+ current->state = TASK_INTERRUPTIBLE;
+ schedule_timeout(1);
+ }
+
+ wmb();
+ np->state = STATE_CLOSED;
+ wmb();
+
+ /* Now no longer safe to take interrupts for this device. */
+ clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
+
+ MOD_DEC_USE_COUNT;
+
return 0;
}
if ( start_info.dom_id == 0 )
(void)register_inetaddr_notifier(¬ifier_inetdev);
+ err = request_irq(NET_IRQ, network_interrupt,
+ SA_SAMPLE_RANDOM, "network", NULL);
+ if ( err )
+ {
+ printk(KERN_WARNING "Could not allocate network interrupt\n");
+ goto fail;
+ }
+
+ err = request_irq(_EVENT_DEBUG, dbg_network_int, SA_SHIRQ, "debug", NULL);
+ if ( err )
+ printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
+
for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
{
if ( start_info.net_rings[i] == 0 )
goto fail;
}
- set_fixmap(FIX_NETRING0_BASE+fixmap_idx, start_info.net_rings[i]);
-
np = dev->priv;
- np->net_ring = (net_ring_t *)fix_to_virt(FIX_NETRING0_BASE+fixmap_idx);
- np->net_idx = &HYPERVISOR_shared_info->net_idx[i];
- np->idx = i;
+ np->state = STATE_CLOSED;
+ np->net_ring_fixmap_idx = fixmap_idx;
+ np->idx = i;
SET_MODULE_OWNER(dev);
dev->open = network_open;
#include <asm/mmu_context.h>
#include <asm/hypervisor.h>
#include <asm/hypervisor-ifs/dom0_ops.h>
+#include <linux/netdevice.h>
+#include <linux/tqueue.h>
/*
* Point at the empty zero page to start with. We map the real shared_info
* Time-to-die callback handling.
*/
-static void time_to_die(int irq, void *unused, struct pt_regs *regs)
+static void die_irq(int irq, void *unused, struct pt_regs *regs)
{
extern void ctrl_alt_del(void);
ctrl_alt_del();
}
-static int __init setup_death_event(void)
+static int __init setup_die_event(void)
{
- (void)request_irq(_EVENT_DIE, time_to_die, 0, "die", NULL);
+ (void)request_irq(_EVENT_DIE, die_irq, 0, "die", NULL);
return 0;
}
-__initcall(setup_death_event);
+__initcall(setup_die_event);
/******************************************************************************
* Stop/pickle callback handling.
*/
-static void time_to_stop(int irq, void *unused, struct pt_regs *regs)
+static void stop_task(void *unused)
{
+ /* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */
+ extern void blkdev_suspend(void);
+ extern void blkdev_resume(void);
+
+ struct net_device *dev;
+ char name[6];
+ int i;
+
+ /* Close down all Ethernet interfaces. */
+ for ( i = 0; i < 10; i++ )
+ {
+ sprintf(name, "eth%d", i);
+ if ( (dev = dev_get_by_name(name)) == NULL )
+ continue;
+ dev_close(dev);
+ dev_put(dev);
+ }
+
+ blkdev_suspend();
+
+ __cli();
+
+ clear_fixmap(FIX_SHARED_INFO);
+
HYPERVISOR_stop();
+
+ set_fixmap(FIX_SHARED_INFO, start_info.shared_info);
+ HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+
+ __sti();
+
+ blkdev_resume();
+
+ /* Bring up all Ethernet interfaces. */
+ for ( i = 0; i < 10; i++ )
+ {
+ sprintf(name, "eth%d", i);
+ if ( (dev = dev_get_by_name(name)) == NULL )
+ continue;
+ dev_open(dev);
+ dev_put(dev);
+ }
+}
+
+static struct tq_struct stop_tq;
+
+static void stop_irq(int irq, void *unused, struct pt_regs *regs)
+{
+ stop_tq.routine = stop_task;
+ schedule_task(&stop_tq);
}
static int __init setup_stop_event(void)
{
- (void)request_irq(_EVENT_STOP, time_to_stop, 0, "stop", NULL);
+ (void)request_irq(_EVENT_STOP, stop_irq, 0, "stop", NULL);
return 0;
}
extern char __init_begin, __init_end;
static inline void set_pte_phys (unsigned long vaddr,
- unsigned long phys, pgprot_t flags)
+ unsigned long phys, pgprot_t prot)
{
- pgprot_t prot;
pgd_t *pgd;
pmd_t *pmd;
pte_t *pte;
if (pte_val(*pte))
pte_ERROR(*pte);
- pgprot_val(prot) = pgprot_val(PAGE_KERNEL) | pgprot_val(flags);
-
/* We queue directly, avoiding hidden phys->machine translation. */
queue_l1_entry_update(pte, phys | pgprot_val(prot));
__flush_tlb_one(vaddr);
}
-void __set_fixmap (enum fixed_addresses idx, unsigned long phys,
- pgprot_t flags)
+void __set_fixmap(enum fixed_addresses idx, unsigned long phys,
+ pgprot_t flags)
{
unsigned long address = __fix_to_virt(idx);
printk("Invalid __set_fixmap\n");
return;
}
- set_pte_phys(address, phys, flags);
+ set_pte_phys(address, phys,
+ __pgprot(pgprot_val(PAGE_KERNEL)|pgprot_val(flags)));
+}
+
+void clear_fixmap(enum fixed_addresses idx)
+{
+ set_pte_phys(__fix_to_virt(idx), 0, __pgprot(0));
}
static void __init fixrange_init (unsigned long start,
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
fixrange_init(vaddr, HYPERVISOR_VIRT_START, init_mm.pgd);
- /* Cheesy: this can probably be moved to the blkdev driver. */
- set_fixmap(FIX_BLKRING_BASE, start_info.blk_ring);
-
/* Switch to the real shared_info page, and clear the dummy page. */
set_fixmap(FIX_SHARED_INFO, start_info.shared_info);
HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
*/
#define set_fixmap_nocache(idx, phys) \
__set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
+
+extern void clear_fixmap(enum fixed_addresses idx);
+
/*
* used by vmalloc.c.
*